rm(list=ls())
gc()
require(foreign)
require(ineq)
require(dplyr)
library(Hmisc)
require(corrplot)
require(systemfit)
require(cem)
require(BayesTree)
require(stargazer)
require(texreg)
require(igraph)
require(plotrix)

setwd(paste(substr(getwd(),1,regexpr("Dropbox",getwd())[1]-1),"Dropbox/coauthors/AMT_Bisbee_Larson/bisbee_larson_replication_final",sep=""))
source("./tools/leg2.R")

###################################################################################
##
##  File Name:    chars_results.R
##
##  Input Files:  chars_data.dta
##                chars_diff.dta
##                data_prepped.dta
##  Output Files: chars_regs.tex                  (Table 3)
##                SI_chars_regs.pdf               (SI Figure 1)
##                chars_adjpvals.pdf              (Figure 4)
##                structure-diff-elicited.pdf     (Figure 6)
##                structure-diff-allchars.pdf     (Figure 7)
##
##  Purpose: This file conducts free-step down resampling to adjust statistical 
##            inference for multiple comparisons and generate Figures 4 and SI 
##            Figure 1. In addition, this file creates Table 3 and uses the dinner
##            table seating question to explore structure differences in Figures 6
##            and 7. Note that the chars_adjpvals.pdf (Figure 4) may differ slightly 
##            in appearance from the figure in the published paper. This is due to 
##            the randomness associated with the jittered points which are included
##            for visual clarity. The substantive conclusions should obtain despite 
##            these small differences. 
##
##############################################################################################

specify_decimal <- function(x, k) format(round(x, k), nsmall=k)
startup <- function(x, out=NULL, ...){
  undo <- gsub("\\\\textasteriskcentered", "*", stargazer(x))#, ...))
  restar <- gsub("* * *", "${}^{***}$", undo, fixed = TRUE)
  restar <- gsub("* *", "${}^{**}$", restar, fixed = TRUE)
  restar <- gsub("* ", "${}^{*}$", restar, fixed = TRUE)
  restar <- gsub("\\textbackslash","\\",restar,fixed = TRUE)
  restar <- gsub("\\ textit\\{","\\textit{",restar,fixed = TRUE)
  restar <- gsub("\\}","}",restar,fixed = TRUE)
  if(!is.null(out)) cat(restar, file = out, sep="\n")
  restar
}

getwd()

######################################################################################
# chars_regs.tex: Table 3
######################################################################################
dat <- read.dta("./characteristics/chars_data.dta")
dat <- dat %>% filter(complete.cases(.)) %>% select(-contains("donate")) %>% mutate(std_interaction_clique = std_interaction_clique *-1,std_interaction_weakest = std_interaction_weakest * -1,std_interaction_weak = std_interaction_weak * -1,std_interaction_strong = std_interaction_strong *-1,std_interaction_strongest = std_interaction_strongest*-1)
# Prep work
controls <- names(dat[,which(grepl("^dem_",colnames(dat)))])
todrop <- c(names(dat[,which(grepl("^std_",colnames(dat)))]),
            names(dat[,which(grepl("^dem_",colnames(dat)))]),
            names(dat[,which(grepl("^tie_",colnames(dat)))]))
cem.dat <- cem(treatment = "online",data=dat,drop=todrop)
trees <- bart(x.train = dat %>% select(which(colnames(dat) %in% names(dat[,which(grepl("^dem_",colnames(dat)))]))),y.train = dat$online,ndpost=1000)
p.score <- pnorm(colMeans(trees$yhat.train))
trt <- dat$online==1
ipw <- trt + (1-trt)/(1-p.score)


outcomes <- c(names(dat[,which(grepl("^std_",colnames(dat)))]))

# Big Table 
big.table <- array(NA,dim = c(34,5))
rownames(big.table) <- c("Job Search","","Contrib. to Entrep.","","Garner Conts.","","Pers. Gain (% $100)","",
                         "Group Gain (% $100)","","Political Hom.","","Religious Hom.","","Education Hom.",
                         "","Class Hom.","","Pers. Crisis","","Pers. Success","","Prof. Crisis",
                         "","Prof. Success","","Pref. Interaction","","Tie Strength","",
                         "Least in Common","","Most in Common","")
colnames(big.table) <- c("Clique","Weakest","Weak","Strong","Strongest")

full.forms <- list(NA)
j = k = 1
for(tie in c("clique","weakest","weak","strong","strongest")) {
  forms.biv <- list(NA)
  biv.outs <- list(NA)
  outcomes.tie <- outcomes[which(grepl(tie,outcomes))]
  
  if(tie == "weak" | tie == "strong") {
    outcomes.tie <- outcomes.tie[which(!(grepl("est",outcomes.tie)))]  
  }
  
  for(i in 1:(length(outcomes.tie))) {
    forms.biv[[i]] <- as.formula(paste(outcomes.tie[i],"~ online"))
    full.forms[[j]] <- forms.biv[[i]]
    biv.outs[[i]] <- lm(forms.biv[[i]],data=dat)
    j = j+1
  }
  
  coefs <- sapply(1:17,function(x) summary(biv.outs[[x]])$coefficients[2,1])
  ses <- sapply(1:17,function(x) summary(biv.outs[[x]])$coefficients[2,2])
  pvals <- sapply(1:17,function(x) summary(biv.outs[[x]])$coefficients[2,4])
  stars <- ifelse(pvals < .01,"***",ifelse(pvals < .05,"**",ifelse(pvals < .1,"*","")))
  
  big.table[seq(1,34,by=2),k] <- paste(specify_decimal(coefs,3),stars,sep="")
  big.table[seq(2,34,by=2),k] <- paste("(",specify_decimal(ses,3),")",sep="")
  k = k+1
}

ords <- match(c("Political Hom.","Religious Hom.","Education Hom.","Class Hom.","Least in Common","Most in Common",
                "Contrib. to Entrep.","Garner Conts.","Pers. Gain (% $100)","Group Gain (% $100)",
                "Pers. Crisis","Pers. Success","Pref. Interaction","Tie Strength",
                "Job Search","Prof. Crisis","Prof. Success"),rownames(big.table))
newords <- NA
for(x in ords) {
  newords <- c(newords,x,x+1)
}
newords <- newords[-1]

big.table <- big.table[newords,]
startup(big.table,keep.stat = c("N","rsq"),out = "./2_Tables/chars_regs.tex")

######################################################################################
# Free-step down resampling
######################################################################################
plot.table <- array(NA,dim = c(34,20))
pval.ctr <- pval.bart <- pval.cem <- pval.biv <- array(NA,dim = c(17,5))
boot.ctr.t <- boot.cem.t <- boot.bart.t <- NA
rownames(plot.table) <- c("Job Search","","Contrib. to Entrep.","","Garner Conts.","","Pers. Gain (% $100)","",
                          "Group Gain (% $100)","","Political Hom.","","Religious Hom.","","Education Hom.",
                          "","Class Hom.","","Pers. Crisis","","Pers. Success","","Prof. Crisis",
                          "","Prof. Success","","Pref. Interaction","","Tie Strength","",
                          "Least in Common","","Most in Common","")
colnames(plot.table) <- sapply(c("Clique","Weakest","Weak","Strong","Strongest"),function(x) paste(x,c("Controls","CEM","BART","SUR"),sep=" "))
k = l = 1
for(tie in c("clique","weakest","weak","strong","strongest")) {
  form.ctr <- outs.ctr <- form.biv <- outs.cem <- outs.bart <- outs.biv <- list(NA)
  outcomes.tie <- outcomes[which(grepl(tie,outcomes))]
  
  if(tie == "weak" | tie == "strong") {
    outcomes.tie <- outcomes.tie[which(!(grepl("est",outcomes.tie)))]  
  }
  
  for(i in 1:(length(outcomes.tie))) {
    form.ctr[[i]] <- as.formula(paste(outcomes.tie[i],"~ online+",paste(controls,collapse="+")))
    form.biv[[i]] <- as.formula(paste(outcomes.tie[i],"~ online"))
    
    mod.biv <- summary(lm(form.biv[[i]],data=dat))
    mod.ctr <- summary(lm(form.ctr[[i]],data=dat))
    mod.cem <- att(cem.dat,form.biv[[i]],data=dat)
    mod.bart <- summary(lm(form.biv[[i]],data=dat,weights=ipw))
    
    outs.biv[[i]] <- mod.biv$coefficients[2,1:2]
    outs.ctr[[i]] <- mod.ctr$coefficients[2,1:2]
    outs.cem[[i]] <- mod.cem$att.model[1:2,2]
    outs.bart[[i]] <- mod.bart$coefficients[2,1:2]

  }
  Coefobs.biv <- unlist(outs.biv)[which(names(unlist(outs.biv)) == "Estimate")]
  Coefobs.ctr <- unlist(outs.ctr)[which(names(unlist(outs.ctr)) == "Estimate")]
  Coefobs.cem <- unlist(outs.cem)[which(names(unlist(outs.cem)) == "Estimate")]
  Coefobs.bart <- unlist(outs.bart)[which(names(unlist(outs.bart)) == "Estimate")]
  
  Tobs.biv <- unlist(outs.biv)[which(names(unlist(outs.biv)) == "Estimate")]/unlist(outs.biv)[which(names(unlist(outs.biv)) == "Std. Error")]
  Tobs.ctr <- unlist(outs.ctr)[which(names(unlist(outs.ctr)) == "Estimate")]/unlist(outs.ctr)[which(names(unlist(outs.ctr)) == "Std. Error")]
  Tobs.cem <- unlist(outs.cem)[which(names(unlist(outs.cem)) == "Estimate")]/unlist(outs.cem)[which(names(unlist(outs.cem)) == "Std. Error")]
  Tobs.bart <- unlist(outs.bart)[which(names(unlist(outs.bart)) == "Estimate")]/unlist(outs.bart)[which(names(unlist(outs.bart)) == "Std. Error")]
  
  Boot.ctr <- Boot.cem <- Boot.bart <- Boot.biv <- matrix(nrow = 100,ncol = length(outcomes.tie))
  for(j in 1:100) {
    SampID <- sample(1:nrow(dat),size = nrow(dat),replace = T)
    bootdat <- dat[SampID,]
    bootcem <- cem(treatment = "online",data=bootdat,drop=todrop)
    resboot.biv <- resboot.ctr <- resboot.cem <- resboot.bart <- respval.ctr <- respval.cem <- respval.biv <- respval.bart <- matrix(nrow = length(outcomes.tie),ncol = 2)
    for (i in 1:length(outcomes.tie)){
      resboot.biv[i,] <- summary(lm(form.biv[[i]],data = bootdat))$coefficients[2,1:2]
      resboot.ctr[i,] <- summary(lm(form.ctr[[i]],data = bootdat))$coefficients[2,1:2]
      resboot.cem[i,] <- att(bootcem,form.biv[[i]],data=bootdat)[[1]][1:2,2]
      resboot.bart[i,] <- summary(lm(form.biv[[i]],data = bootdat,weights = ipw[SampID]))$coefficients[2,1:2]
    }
    if(length(resboot.biv[,1]) == length(Coefobs.biv)) {
      Boot.biv[j,] <- (resboot.biv[,1] - Coefobs.biv)/resboot.biv[,2]
    }
    if(length(resboot.ctr[,1]) == length(Coefobs.ctr)) {
      Boot.ctr[j,] <- (resboot.ctr[,1] - Coefobs.ctr)/resboot.ctr[,2]
    }
    if(length(resboot.cem[,1]) == length(Coefobs.cem)) {
      Boot.cem[j,] <- (resboot.cem[,1] - Coefobs.cem)/resboot.cem[,2]
    }
    if(length(resboot.bart[,1]) == length(Coefobs.bart)) {
      Boot.bart[j,] <- (resboot.bart[,1] - Coefobs.bart)/resboot.bart[,2]
    }
    cat(paste(j,"\n"))
  }
  
  Qmat.biv <- t(apply(Boot.biv,1,cummax))
  Qmat.ctr <- t(apply(Boot.ctr,1,cummax))
  Qmat.cem <- t(apply(Boot.cem,1,cummax))
  Qmat.bart <- t(apply(Boot.bart,1,cummax))

  pval.biv[,l] <- apply(t(matrix(rep(Tobs.biv,100),17)) <= Qmat.biv,2,mean)
  pval.ctr[,l] <- apply(t(matrix(rep(Tobs.ctr,100),17)) <= Qmat.ctr,2,mean)
  pval.cem[,l] <- apply(t(matrix(rep(Tobs.cem,100),17)) <= Qmat.cem,2,mean)
  pval.bart[,l] <- apply(t(matrix(rep(Tobs.bart,100),17)) <= Qmat.bart,2,mean)
  
  cj.tmp.ctr <- cj.tmp.cem <- cj.tmp.bart <- NA
  ref <- seq(2.5,3.5,.05)
  z = 1
  for(cj in ref) {
    cj.tmp.ctr[z] <- mean(apply(abs(Boot.ctr)>cj,1,sum)>= 1,na.rm=T)
    cj.tmp.cem[z] <- mean(apply(abs(Boot.cem)>cj,1,sum)>= 1,na.rm=T)
    cj.tmp.bart[z] <- mean(apply(abs(Boot.bart)>cj,1,sum)>= 1,na.rm=T)
    z = z+1
  }
  
  boot.ctr.t[l] <- ref[which(abs(0.05 - cj.tmp.ctr) == min(abs(0.05 - cj.tmp.ctr)))][1]
  boot.cem.t[l] <- ref[which(abs(0.05 - cj.tmp.cem) == min(abs(0.05 - cj.tmp.cem)))][1]
  boot.bart.t[l] <- ref[which(abs(0.05 - cj.tmp.bart) == min(abs(0.05 - cj.tmp.bart)))][1]
  
  outs.sur <- systemfit(form.biv,data=dat,method = "SUR")
  
  plot.table[seq(1,34,by=2),k] <- unlist(outs.ctr)[seq(1,34,by=2)]
  plot.table[seq(2,34,by=2),k] <- unlist(outs.ctr)[seq(2,34,by=2)]
  k = k+1
  plot.table[seq(1,34,by=2),k] <- unlist(outs.cem)[seq(1,34,by=2)]
  plot.table[seq(2,34,by=2),k] <- unlist(outs.cem)[seq(2,34,by=2)]
  k = k+1
  plot.table[seq(1,34,by=2),k] <- unlist(outs.bart)[seq(1,34,by=2)]
  plot.table[seq(2,34,by=2),k] <- unlist(outs.bart)[seq(2,34,by=2)]
  k = k+1
  plot.table[seq(1,34,by=2),k] <- summary(outs.sur)$coefficients[seq(2,34,by=2),1]
  plot.table[seq(2,34,by=2),k] <- summary(outs.sur)$coefficients[seq(2,34,by=2),2]
  k = k+1
  l = l+1
}

ctr.n <- summary(lm(form.ctr[[i]],data=dat))$df[2]
cem.n <- sum(att(cem.dat,form.biv[[i]],data=dat)[[2]][2,])
bart.n <- summary(lm(form.biv[[i]],data=dat,weights = ipw))$df[2]

ctr.stat <- qt((1-.025),df = ctr.n)
ctr.bonf <- qt(1-(.025/length(outcomes.tie)),df = ctr.n)
cem.stat <- qt((1-.025),df = cem.n)
cem.bonf <- qt((1-(.025/length(outcomes.tie))),df = cem.n)
bart.stat <- qt((1-.025),df = bart.n)
bart.bonf <- qt((1-(.025/length(outcomes.tie))),df = bart.n)

######################################################################################
# SI_chars_regs.pdf: Supporting Information Figure 1
######################################################################################
oldnames <- rownames(plot.table)
ords <- match(c("Political Hom.","Religious Hom.","Education Hom.","Class Hom.","Least in Common","Most in Common",
                "Contrib. to Entrep.","Garner Conts.","Pers. Gain (% $100)","Group Gain (% $100)",
                "Pers. Crisis","Pers. Success","Pref. Interaction","Tie Strength",
                "Job Search","Prof. Crisis","Prof. Success"),rownames(plot.table))
newords <- NA
for(x in ords) {
  newords <- c(newords,x,x+1)
}
newords <- newords[-1]
plot.table <- plot.table[newords,]
pdf("./1_Figures/SI_chars_regs.pdf")
  nf <- layout(matrix(c(1,2,3,4,5,6,7,7,7,7,7,7),2,6,byrow=T),widths=c(2,1.4,1.4,1.4,1.4,1.4),heights=c(7,.5))
  par(mar=c(1,.5,3,.5))
  plot(rep(0,17),1:17,type='n',xlab="",ylab="",xaxt='n',yaxt='n',bty='n')
  mtext(text = rev(rownames(plot.table[seq(1,34,by=2),])),side = 2,at = 1:17,las=2,cex=.8,line = -10)
  j <- 1
  for(tie in c("clique","weakest","weak","strong","strongest")) {
    mins <- min(plot.table[seq(1,34,by=2),j:(j+3)] - 3.1*plot.table[seq(2,34,by=2),j:(j+3)])
    maxs <- max(plot.table[seq(1,34,by=2),j:(j+3)] + 3.1*plot.table[seq(2,34,by=2),j:(j+3)])
    
    plot(plot.table[seq(1,34,by=2),1],1:17,xlim=c(mins,maxs),xaxt='n',yaxt='n',xlab="",ylab="",type='n',bty='n',main=capitalize(tie),cex.main = 1.3)
    box(col=rgb(0,0,0,.3))
    #axis(side = 1,at=seq(round(mins),round(maxs),length.out=5),labels=seq(round(mins),round(maxs),length.out=5),cex.axis=.8,col=rgb(0,0,0,.7))
    yadj <- .3
    k = 1
    for(i in j:(j+3)) {
      tstats <- abs(rev(plot.table[seq(1,34,by=2),i])/rev(plot.table[seq(2,34,by=2),i]))
      pt.cols <- ifelse(tstats > ctr.bonf,rgb(0,0,0,.5),
                        ifelse(tstats > ctr.stat,rgb(0,0,0,.3),rgb(0,0,0,.05)))
      rect(rev(plot.table[seq(1,34,by=2),i]) - ctr.bonf*rev(plot.table[seq(2,34,by=2),i]),(1:17)+yadj-.05,
               rev(plot.table[seq(1,34,by=2),i]) + ctr.bonf*rev(plot.table[seq(2,34,by=2),i]),(1:17)+yadj+.05,
           col=NA,border=pt.cols)
      rect(rev(plot.table[seq(1,34,by=2),i]) - ctr.stat*rev(plot.table[seq(2,34,by=2),i]),(1:17)+yadj-.05,
           rev(plot.table[seq(1,34,by=2),i]) + ctr.stat*rev(plot.table[seq(2,34,by=2),i]),(1:17)+yadj+.05,
           col=pt.cols,border = NA)
      points(rev(plot.table[seq(1,34,by=2),i]),(1:17)+yadj,pch=k,cex=.8,col = pt.cols)
      yadj <- yadj - .15
      k = k+1
    }
    abline(v = 0,lty=3)
    j <- j+4
  }
  
  #text(max(plot.table[1,17:20] + ctr.bonf*plot.table[2,17:20]*1.1),
  #     c(17.4,17.2,17,16.8),labels = c("Controls","CEM","BART","SUR"),adj = 0,cex=.9)
  par(mar=c(0,6,0,0))
  plot(0,type='n',xlab="",ylab="",xaxt='n',yaxt='n',bty='n')
  leg2("top",legend=c("Controls","CEM","BART","SUR","MC Significant","95% Significant"),
         pch=c(1:4,NA,NA),fill=c(NA,NA,NA,NA,"white","grey60"),fill.lty = c(0,0,0,0,1,1),
         col=c(rep("black",5),"grey50"),ncol=3,bty='n',cex = 1.2)
dev.off()

######################################################################################
# chars_adjpvals.pdf: Figure 4
######################################################################################
ords <- rev(order(apply(cbind(pval.biv[,2:5],pval.cem[,2:5],pval.ctr[,2:5],pval.bart[,2:5]),1,min)))
sizes <- ifelse(pval.bart[,1] < .05,1.2,1)

pdf("./1_Figures/chars_adjpvals.pdf")
par(mar=c(5,9,3,1))
plot(1-jitter(pval.bart[,1][ords]),1:17,xlim=c(0,1),pch=19,col=rgb(0,0,0,.2),yaxt='n',ylab="",xlab="Confidence Level",bty='n',type = 'n',
     main="Adjusted Level of Confidence")
mtext(text = oldnames[seq(1,34,by=2)][ords],side = 2,at = 1:17,las=2,cex=1,line = 1)
for(i in 2:5) {
  pvs <- jitter(pval.bart[,i],amount = .01) #ifelse(pval.bart[,i] > .5,.5,jitter(pval.bart[,i]))
  sizes <- ifelse(pvs < .05,1.3,.4)
  cols <- ifelse(pvs > .05,rgb(0,0,0,.1),rgb(0,0,0,.5))
  pchs <- ifelse(pvs < .05,21,19)
  points(1-pvs[ords],1:17,pch=pchs[ords],col="black",cex = sizes[ords])
}
for(i in 2:5) {
  pvs <- jitter(pval.ctr[,i],amount = .01) #ifelse(pval.ctr[,i] > .5,.5,jitter(pval.ctr[,i]))
  cols <- ifelse(pvs > .05,rgb(0,0,0,.1),rgb(0,0,0,.5))
  sizes <- ifelse(pvs < .05,1.3,.4)
  pchs <- ifelse(pvs < .05,21,19)
  points(1-pvs[ords],1:17,pch=pchs[ords],col="black",cex = sizes[ords])
}
for(i in 2:5) {
  pvs <- jitter(pval.biv[,i],amount = .01) #ifelse(pval.ctr[,i] > .5,.5,jitter(pval.ctr[,i]))
  sizes <- ifelse(pvs < .05,1.3,.4)
  cols <- ifelse(pvs > .05,rgb(0,0,0,.1),rgb(0,0,0,.5))
  pchs <- ifelse(pvs < .05,21,19)
  points(1-pvs[ords],1:17,pch=pchs[ords],col="black",cex = sizes[ords])
}
for(i in 2:5) {
  pvs <- jitter(pval.cem[,i],amount = .01) #ifelse(pval.ctr[,i] > .5,.5,jitter(pval.ctr[,i]))
  cols <- ifelse(pvs > .05,rgb(0,0,0,.1),rgb(0,0,0,.5))
  sizes <- ifelse(pvs < .05,1.3,.4)
  pchs <- ifelse(pvs < .05,21,19)
  points(1-pvs[ords],1:17,pch=pchs[ords],col="black",cex = sizes[ords])
}

abline(v = 0.95,lty=2)
abline(h = 1:17,lty=3,col=rgb(0,0,0,.1))
legend("bottomright",legend=c("Sig. @ 95%","Insignificant"),pch=c(21,19),pt.cex = c(1.5,.6),col="black",bty='o',bg = "white")
dev.off()

############################################################################
# chars_diffs.pdf: Figure 5
############################################################################
dat <- read.dta("./characteristics/chars_diffs.dta")
coefs <- dat[,1]
ses <- dat[,2]
y.axis <- c(1:17)
min <- -.65
max <- .65
dat$names <- c("Least in Common","Garner Conts.","Pers. Success","Education Hom.","Prof. Success","Pref. Interaction","Tie Strength","Pers. Crisis","Personal Gain (% $100)","Prof. Crisis","Job Search","Religious Hom.","Most in Common","Class Hom.","Political Hom.","Group Gain (% $100)","Contrib. to Entrep.")
var.names <- dat$names
adjust <- 0.05

pdf("./1_Figures/chars_diffs.pdf",width = 6,height=5)
par(mar=c(5,10,3,1))
plot(coefs, y.axis, type = "p", axes = F, xlab = "Online Coefficient", ylab = "", pch = 19, cex = .8, 
     xlim=c(min,max),ylim = c(.5,17.5), main = "Diff. between Weakest / Strongest")
axis(1, at = seq(min,max,by=.1),labels = seq(min,max,by=.1),tick = T,cex.axis = .8, mgp = c(2,.7,0))
axis(2, at = y.axis, label = var.names, las = 1, tick = FALSE, cex.axis =.8)#,pos=-.45)
rect(coefs-qnorm(1-0.025/length(y.axis))*ses, y.axis-.25, coefs+qnorm(1-0.025/length(y.axis))*ses, y.axis+.25,
     col=NA,border=rgb(0,0,0,.3))
rect(coefs-qnorm(.975)*ses, y.axis-.25, coefs+qnorm(.975)*ses, y.axis+.25, col=rgb(0,0,0,.3),border = NA)
abline(v=0, lty = 2) 
points(coefs, y.axis,pch=21,cex=.8, bg="white")

leg2("bottomright",legend=c("MC CI","95% CI"),
     fill=c("white","grey70"),
     border="grey60",bty='n',cex=.8,bg="white")
dev.off()

############################################################################
# structure-diff-elicited.pdf: Figure 6
############################################################################
ttest.coefs <- function(beta1,beta2,se1,se2) {
  t.stat <- (beta1-beta2)/(sqrt(se1^2 + se2^2))
  diff <- abs(beta1 - beta2)
  pval <- 2*pt(-abs(t.stat),df = 450)
  star <- paste("(",sprintf("%.2f",round(pval,2)),ifelse(pval < 0.05,"*)",")"),sep="")
  return(list(star = star,pval = pval,diff = diff))
}

scaffold <- matrix(rbind(c(0,2,0,3,1,0),c(2,0,2,1,3,1),c(0,2,0,0,1,3),c(3,1,0,0,2,0),c(1,3,1,2,0,2),c(0,1,3,0,2,0)),nrow=6,ncol=6)

dat <- read.dta("./tools/data_prepped.dta")

# Seat Assignment Differences
pdf("./1_Figures/structure-diff-elicited.pdf")
nf <- layout(matrix(1:24,6,4,byrow=T),widths=c(1.5,3,3,3))
#layout.show(nf)
for(tie in c("clique","weakest","weak","strong","strongest","you")) {
  mains <- c("","","")
  if(tie == "clique") {
    mains <- c("Online","Offline","Difference")
  }
  tie.on <- tie.off <- pvals <- diffs <- NA
  for(i in 0:5) {
    on <- dat %>% filter(online == 1)
    off <- dat %>% filter(online == 0)
    tie.on[i+1] <- mean(on[,paste("seat_mst_",tie,sep="")] == i,na.rm=T)
    tie.off[i+1] <- mean(off[,paste("seat_mst_",tie,sep="")] == i,na.rm=T)
    pvals[i+1] <- t.test(on[,paste("seat_mst_",tie,sep="")] == i,off[,paste("seat_mst_",tie,sep="")] == i)$p.value
    diffs[i+1] <- abs(tie.on[i+1] - tie.off[i+1])
  }
  par(mar=c(0,0,0,0))
  plot.new()
  text(0,.5,labels=capitalize(tie),pos = 4,cex = 1.5)
  
  par(mar=c(1,1,3,1))
  nw.graph.diff <- graph.adjacency(scaffold,weighted = T)
  V(nw.graph.diff)$color <- "black"
  norm <- (tie.on - min(tie.on))/(max(tie.on) - min(tie.on))+1
  V(nw.graph.diff)$size <- norm*20
  
  E(nw.graph.diff)$lty <- 1
  E(nw.graph.diff)$lty[which(E(nw.graph.diff)$weight == 1)] <- 3
  
  plot.igraph(nw.graph.diff,
              layout = layout.grid,
              edge.width = (E(nw.graph.diff)$weight^2)/3,
              edge.arrow.size = 0,
              edge.color = "black",
              vertex.frame.color = "black",
              vertex.label.dist = 2,
              vertex.label.degree = c(rep(pi/2,3),rep(-pi/2,3)),
              vertex.label = round(tie.on,2),
              vertex.label.color = "black",
              main = mains[1])
  
  
  nw.graph.diff <- graph.adjacency(scaffold,weighted = T)
  V(nw.graph.diff)$color <- "black"
  norm <- (tie.off - min(tie.off))/(max(tie.off) - min(tie.off))+1
  V(nw.graph.diff)$size <- norm*20
  
  E(nw.graph.diff)$lty <- 1
  E(nw.graph.diff)$lty[which(E(nw.graph.diff)$weight == 1)] <- 3
  
  plot.igraph(nw.graph.diff,
              layout = layout.grid,
              edge.width = (E(nw.graph.diff)$weight^2)/3,
              edge.arrow.size = 0,
              edge.color = "black",
              vertex.frame.color = "black",
              vertex.label.dist = 2,
              vertex.label.degree = c(rep(pi/2,3),rep(-pi/2,3)),
              vertex.label = round(tie.off,2),
              vertex.label.color = "black",
              main = mains[2])
  
  nw.graph.diff <- graph.adjacency(scaffold,weighted = T)
  V(nw.graph.diff)$color <- ifelse(pvals < 0.00028,"black",ifelse(pvals < 0.05,rgb(0,0,0,.2),"white"))
  norm <- (diffs - min(diffs))/(max(diffs) - min(diffs))+1
  V(nw.graph.diff)$size <- norm*20
  
  E(nw.graph.diff)$lty <- 1
  E(nw.graph.diff)$lty[which(E(nw.graph.diff)$weight == 1)] <- 3
  
  plot.igraph(nw.graph.diff,
              #margin=c(0,-.5,0,-.5),
              layout = layout.grid,
              edge.width = (E(nw.graph.diff)$weight^2)/3,
              edge.arrow.size = 0,
              edge.color = "black",
              vertex.frame.color = "black",
              vertex.label.dist = 2,
              vertex.label.degree = c(rep(pi/2,3),rep(-pi/2,3)),
              vertex.label = paste(sprintf("%.2f",round(diffs,2)),ifelse(pvals < 0.00028,"**",ifelse(pvals < 0.05,"*","")),sep=""),
              vertex.label.color = "black",
              main = mains[3])
}
dev.off()

extract.dimension.avgs <- function(dimension,dat) {
  dim.res <- data.frame(pos0=as.numeric(),pos1=as.numeric(),pos2=as.numeric(),pos3=as.numeric(),pos4=as.numeric(),pos5=as.numeric(),online = as.numeric())
  for(i in 1:nrow(dat)) {
    tmp <- dat %>% 
      select(contains("seat_mst")) %>% 
      select(-contains("synth"),-contains("connected")) %>% 
      slice(i)
    
    dim <- dat[i,grepl(dimension,colnames(dat))]
    
    names(dim) <- substr(names(dim),nchar(dimension)+2,100)
    dim <- dim[,which(colnames(dim) %in% c("weakest","weak","strong","strongest","clique"))]
    dim$you <- NA
    
    ties <- substr(names(tmp)[order(tmp)],10,100)
    tmp <- tmp[order(tmp)]
    names(tmp) <- ties
    
    colords <- match(names(tmp),names(dim))
    dim.res[i,] <- cbind(dim[1,colords],dat[i,"online"])
  }
  ttests <- sapply(1:6, function(y) t.test(dim.res %>% filter(online == 1) %>% select(y),dim.res %>% filter(online == 0) %>% select(y)))
  diffs <- sapply(1:6, function(y) abs(ttests[,y]$estimate[1] - ttests[,y]$estimate[2]))
  pvals <- sapply(1:6, function(y) ttests[,y]$p.value)
  return(list(dim.res,diffs,pvals))
}


############################################################################
# structure-diff-allchars.pdf: Figure 7
############################################################################
lookup <- cbind(c("jobsearch","donate","interaction","garner","contribute","personalgain","grpgain",
                  "prof_cris_synth","prof_succ_synth","pers_cris_synth","pers_succ_synth",
                  "homophily_political","homophily_religious","homophily_education","homophily_life_skills","tie"),
                c("Job Search","Donate (% $100)","Pref. Interaction","Garner Conts.","Contrib. to Entrep.","Personal Gain (% $100)","Group Gain (% $100)",
                  "Prof. Crisis","Prof. Success","Pers. Crisis","Pers. Success",
                  "Political Hom.","Religious Hom.","Education Hom.","Class Hom.","Tie Strength"))


pdf("./1_Figures/structure-diff-allchars.pdf")
  par(mfrow=c(4,4),mar=c(2,1,3,1))
  for(d in c("homophily_political","homophily_religious","homophily_education","homophily_life_skills",
             "contribute","garner","personalgain","grpgain",
             "pers_cris_synth","pers_succ_synth","interaction","tie",
             "jobsearch","prof_cris_synth","prof_succ_synth")) {
    result <- extract.dimension.avgs(d,dat)
    nw.graph.diff <- graph.adjacency(scaffold,weighted = T)
    V(nw.graph.diff)$color <- ifelse(result[[3]] < 0.00028,"black",ifelse(result[[3]] < 0.05,rgb(0,0,0,.2),"white"))
    norm <- (result[[2]] - min(result[[2]]))/(max(result[[2]]) - min(result[[2]]))+1
    V(nw.graph.diff)$size <- norm*10
    
    E(nw.graph.diff)$lty <- 1
    E(nw.graph.diff)$lty[which(E(nw.graph.diff)$weight == 1)] <- 3
    
    plot.igraph(nw.graph.diff,
                layout = layout.grid,
                edge.width = (E(nw.graph.diff)$weight^2)/3,
                edge.arrow.size = 0,
                edge.color = "black",
                vertex.frame.color = "black",
                vertex.label.dist = 1.7,
                vertex.label.degree = c(rep(pi/2,3),rep(-pi/2,3)),
                vertex.label = paste(sprintf("%.2f",round(result[[2]],2)),ifelse(result[[3]] < 0.00028,"**",
                                                                                 ifelse(result[[3]] < 0.05,"*","")),sep=""),
                vertex.label.color = "black",
                main = lookup[which(lookup[,1] == d),2])
  }
  plot.new()
  legend("top",legend=c("*  Sig @ 95%","**  Sig @ MC"),fill=c(rgb(0,0,0,.2),"black"),border=c(NA,NA),bty='n')
dev.off()
